Wisconsin Prognosis

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data

dataBreast <- read.csv("~/GitHub/RISKPLOTS/DATA/wpbc.data", header=FALSE)
table(dataBreast$V2)
## 
##   N   R 
## 151  47
rownames(dataBreast) <- dataBreast$V1
dataBreast$V1 <- NULL
dataBreast$status <- 1*(dataBreast$V2=="R")
dataBreast$V2 <- NULL
dataBreast$time <- dataBreast$V3
dataBreast$V3 <- NULL
dataBreast <- sapply(dataBreast,as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
dataBreast <- as.data.frame(dataBreast[complete.cases(dataBreast),])
table(dataBreast$status)
## 
##   0   1 
## 148  46

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataBreast)

[++++]

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
V26 8.07e-03 1 1.01 1.01 0.593 0.237
V27 4.13e-04 1 1.00 1.00 0.608 0.237
V24 7.71e-03 1 1.01 1.01 0.598 0.634
V35 8.65e-06 1 1.00 1.00 0.727 0.237
V34 9.13e-03 1 1.01 1.02 0.634 0.598
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI
V26 0.593 0.598 0.500 0.598 0.0626 0.393 2.77
V27 0.608 0.608 0.500 0.608 0.0563 0.434 2.76
V24 0.603 0.609 0.618 0.613 0.0532 0.323 2.62
V35 0.727 0.641 0.500 0.641 0.0289 0.565 2.28
V34 0.603 0.618 0.609 0.613 0.0233 0.411 2.13
  z.NRI Delta.AUC Frequency
V26 2.38 0.09827 1
V27 2.63 0.10840 1
V24 1.94 -0.00529 1
V35 3.50 0.14116 1
V34 2.47 0.00338 1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataBreast)
timeinterval <- 2*mean(subset(dataBreast,status==1)$time)

h0 <- sum(dataBreast$status & dataBreast$time <= timeinterval)
h0 <- h0/sum((dataBreast$time > timeinterval) | (dataBreast$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.323 51.1
rdata <- cbind(dataBreast$status,ppoisGzero(index,h0))
rownames(rdata) <- rownames(dataBreast)

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataBreast$time,
                     title="Raw Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

As we can see the Observed probability as well as the Time vs. Events are not calibrated.

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.41827 0.2616 0.164 1.58e-01 0.48819
RR 2.09244 2.2857 5.172 2.50e+01 2.14188
RR_LCI 1.24116 1.3037 0.753 5.22e-02 1.16605
RR_UCI 3.52758 4.0074 35.524 1.20e+04 3.93434
SEN 0.26087 0.6957 0.978 1.00e+00 0.15217
SPE 0.89189 0.5608 0.128 6.76e-02 0.94595
BACC 0.57638 0.6282 0.553 5.34e-01 0.54906
NetBenefit 0.00257 0.0463 0.101 1.03e-01 -0.00324
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.814 0.596 1.09 0.183
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.998 0.999 0.944 1.06
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.902 0.902 0.893 0.91
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.677 0.676 0.589 0.754
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.635 0.543 0.727
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.261 0.143 0.411
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.419
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 11.670372 on 1 degrees of freedom, p = 0.000635
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 167 34 41.11 1.23 11.7
class=1 27 12 4.89 10.33 11.7

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataBreast,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.3 0.931 48.6

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataBreast$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataBreast$time,
                     title="Calibrated Train: Breast",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.39623 0.2461 0.154 1.48e-01 0.5001
RR 2.09244 2.2857 5.172 2.50e+01 2.7222
RR_LCI 1.24116 1.3037 0.753 5.22e-02 1.5655
RR_UCI 3.52758 4.0074 35.524 1.20e+04 4.7336
SEN 0.26087 0.6957 0.978 1.00e+00 0.1522
SPE 0.89189 0.5608 0.128 6.76e-02 0.9662
BACC 0.57638 0.6282 0.553 5.34e-01 0.5592
NetBenefit 0.00774 0.0556 0.111 1.13e-01 0.0103
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.832 0.609 1.11 0.226
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.02 1.02 0.96 1.08
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.932 0.932 0.923 0.941
pander::pander(t(rrAnalysisTrain$c.index$cstatCI),caption="C. Index")
C. Index
mean.C Index median lower upper
0.677 0.678 0.599 0.751
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.635 0.543 0.727
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.261 0.143 0.411
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.397
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 11.670372 on 1 degrees of freedom, p = 0.000635
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 167 34 41.11 1.23 11.7
class=1 27 12 4.89 10.33 11.7

Cross-Validation

Here we use the estimated h0 and timeinterval from the full set

rcv <- randomCV(theData=dataBreast,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.9,
                repetitions=100,
                classSamplingType = "Pro"
         )

.[+++++].[+++++].[++++].[+++++++].[++++].[++++++].[+++++].[+++++].[++++++++++]..[+++++]10 Tested: 128 Avg. Selected: 6 Min Tests: 1 Max Tests: 4 Mean Tests: 1.5625 . MAD: 0.4786672

.[+].[+++++++++].[++++++].[+++++].[++++++++].[+].[++++].[++++].[+++++].[++++++]20 Tested: 171 Avg. Selected: 5.6 Min Tests: 1 Max Tests: 7 Mean Tests: 2.339181 . MAD: 0.4870576

.[+++].[–].[+++].[++++].[++++].[++].[+++].[++++++++].[++++].[+++++++]30 Tested: 188 Avg. Selected: 5.2 Min Tests: 1 Max Tests: 10 Mean Tests: 3.191489 . MAD: 0.4797659

.[++++++].[+++].[++++].[++].[++++].[+++++].[++].[++++].[++++].[++++]40 Tested: 191 Avg. Selected: 5.05 Min Tests: 1 Max Tests: 12 Mean Tests: 4.188482 . MAD: 0.4855026

.[+++].[++].[–].[++++].[+++].[++++++].[++].[+++].[–].[+++++++]50 Tested: 194 Avg. Selected: 4.72 Min Tests: 1 Max Tests: 13 Mean Tests: 5.154639 . MAD: 0.4836818

.[+++].[+++++].[+].[+++].[+++++].[++].[++++].[++++].[+++].[+++]60 Tested: 194 Avg. Selected: 4.516667 Min Tests: 1 Max Tests: 14 Mean Tests: 6.185567 . MAD: 0.4843055

.[+++++++].[++++++++].[+++++].[++++++].[++++].[+++++].[+++].[++++].[+++].[+++]70 Tested: 194 Avg. Selected: 4.628571 Min Tests: 1 Max Tests: 15 Mean Tests: 7.216495 . MAD: 0.4841789

.[+++++++].[+].[++++++].[+++++++].[+++].[+++].[+++++].[+++++++].[++++].[+++]80 Tested: 194 Avg. Selected: 4.6625 Min Tests: 3 Max Tests: 16 Mean Tests: 8.247423 . MAD: 0.4852751

.[+++++].[++++++].[+++++++].[–].[++++++].[+++++].[++++++].[++++++].[+++++++].[++++++++]90 Tested: 194 Avg. Selected: 4.811111 Min Tests: 3 Max Tests: 18 Mean Tests: 9.278351 . MAD: 0.4837295

.[+++].[+].[++++++].[++++++].[++++].[+++++++].[+++++++].[+++].[++++].[+++]100 Tested: 194 Avg. Selected: 4.85 Min Tests: 4 Max Tests: 20 Mean Tests: 10.30928 . MAD: 0.483165

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Cross-Validation Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.397 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.396 0.2349 0.158 1.36e-01 0.498353
RR 1.653 2.2562 2.671 1.22e+01 2.275000
RR_LCI 0.946 1.2456 0.697 2.62e-02 1.213414
RR_UCI 2.888 4.0866 10.227 5.65e+03 4.265343
SEN 0.239 0.7391 0.957 1.00e+00 0.130435
SPE 0.865 0.5000 0.128 3.38e-02 0.959459
BACC 0.552 0.6196 0.542 5.17e-01 0.544947
NetBenefit -0.011 0.0582 0.102 1.21e-01 0.000209
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.819 0.6 1.09 0.204
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.01 1.01 0.951 1.07
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.887 0.887 0.874 0.9
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.656 0.656 0.569 0.735
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.603 0.508 0.698
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.217 0.109 0.364
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.865 0.799 0.915
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.397
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 3.688241 on 1 degrees of freedom, p = 0.054797
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 164 36 40.28 0.454 3.69
class=1 30 10 5.72 3.194 3.69

Calibrating the test results

rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.323 1 49.1
timeinterval <- calprob$timeInterval;

rdata <- cbind(status,calprob$prob)


rrAnalysisTest <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=times,
                     title="Calibrated Test: Breast",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

### Calibrated Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.40764 0.2349 0.158 1.36e-01 0.498353
RR 1.87778 2.2562 2.671 1.22e+01 2.275000
RR_LCI 1.07177 1.2456 0.697 2.62e-02 1.213414
RR_UCI 3.28994 4.0866 10.227 5.65e+03 4.265343
SEN 0.21739 0.7391 0.957 1.00e+00 0.130435
SPE 0.89865 0.5000 0.128 3.38e-02 0.959459
BACC 0.55802 0.6196 0.542 5.17e-01 0.544947
NetBenefit -0.00165 0.0582 0.102 1.21e-01 0.000209
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.827 0.606 1.1 0.227
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.02 1.02 0.962 1.09
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.891 0.891 0.877 0.904
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.656 0.656 0.575 0.741
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.603 0.508 0.698
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.196 0.0936 0.339
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.409
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 5.544463 on 1 degrees of freedom, p = 0.018539
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 170 37 41.65 0.52 5.54
class=1 24 9 4.35 4.98 5.54